# CLEAN WORKSPACE AND LOAD PACKAGES --------------------------------------------
rm(list = ls())
library(spmirt)
library(ggplot2)
library(datasim)
library(tidyverse)
# SIMULATE DATA ----------------------------------------------------------------
n <- 300
f <- list(
prob ~ I(0) +
gp(list(s1, s2), cor.model = "exp_cor", cor.params = list(phi = 0.04),
sigma2 = 1),
size ~ I(1)
)
data <- sim_model(formula = f,
link_inv = list(pnorm, identity),
generator = rbinom,
n = n,
seed = 2
)
data <- dplyr::rename(data, gp = gp.list.prob)
ggplot(data, aes(s1, s2)) +
geom_point(aes(col = gp, size = gp)) +
scale_colour_distiller(palette = "RdYlBu")

ggplot(data, aes(s1, s2)) +
geom_point(aes(col = factor(response)), size = 2)

data$gp2 = data$gp + rnorm(300)
vg <- gstat::variogram(gp2 ~ 1, ~ s1 + s2, data, cutoff = 0.7, width = 0.005)
ggplot(vg, aes(dist, gamma, weight = np)) +
geom_point(aes(size = np)) +
geom_smooth() +
expand_limits(y = 0, x = 0) +
scale_x_continuous(limits = c(0, 0.7))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

vg <- gstat::variogram(response ~ 1, ~ s1 + s2, data, cutoff = 0.7, width = 0.005)
ggplot(vg, aes(dist, gamma)) +
geom_point(aes(size = np)) +
geom_smooth() +
expand_limits(y = 0, x = 0) +
scale_x_continuous(limits = c(0, 0.7))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

noise <- rnorm(n)
# RUN MODELS -------------------------------------------------------------------
Rcpp::sourceCpp("../src/gp-gibss-adap.cpp")
source("../R/ggplot-mcmc.R")
# 50000 -> 17 minutes
# First naive adaptive
iter <- 5 * 10 ^ 4
dist <- as.matrix(dist(dplyr::select(data, s1, s2)))
sigma_prop <- matrix(c(0.138, -0.023, -0.023, 0.1), 2) * 2.38 ^ 2 / 2
system.time(
samples <- probit_gp_adap(data$response, dist, c(log(1), log(0.04)), iter,
sigma_prop)
)
## user system elapsed
## 665.973 509.216 293.875
samples_tib <- as_tibble.spmirt.list(samples, iter/2, select = "param")
summary(samples_tib)
## # A tibble: 2 x 6
## Parameters `2.5%` `10%` `50%` `90%` `97.5%`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 V1 0.539 0.687 1.10 1.77 2.31
## 2 V2 0.0183 0.0226 0.0342 0.0505 0.0623
samples_long <- gather(samples_tib)
as_tibble.spmirt.list(samples, 0, 1, "param") %>%
gg_trace(alpha = 0.6, wrap = TRUE)

as_tibble.spmirt.list(samples, iter/2, 1, "param") %>%
gg_density(alpha = 0.5)

as_tibble.spmirt.list(samples, iter/2, 15, "param") %>%
gg_density2d(V1, V2)

as_tibble.spmirt.list(samples, iter/2, 15, "param") %>%
gg_density2d(log(V1), log(V2))

nrow(unique(samples_tib)) / nrow(samples_tib)
## [1] 0.1924
coda::effectiveSize(log(samples_tib))
## V1 V2
## 433.6714 1068.1642
acf(samples_tib)

cov(log(samples_tib))
## V1 V2
## V1 0.13624702 -0.01939658
## V2 -0.01939658 0.09665395
# Adaptive with stochastic approximation series
system.time(
samples2 <- probit_gp_am(data$response, dist, c(log(1), log(0.04)), iter,
sigma_prop)
)
## user system elapsed
## 543.614 366.832 227.698
samples_tib2 <- as_tibble.spmirt.list(samples2, iter/2, select = "param")
summary(samples_tib2)
## # A tibble: 2 x 6
## Parameters `2.5%` `10%` `50%` `90%` `97.5%`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 V1 0.540 0.699 1.08 1.78 2.31
## 2 V2 0.0181 0.0226 0.0342 0.0506 0.0634
samples_long2 <- gather(samples_tib2)
as_tibble.spmirt.list(samples2, 0, 1, "param") %>%
gg_trace(alpha = 0.6, wrap = TRUE)

as_tibble.spmirt.list(samples2, iter/2, 1, "param") %>%
gg_density(alpha = 0.5)

as_tibble.spmirt.list(samples2, iter/2, 15, "param") %>%
gg_density2d(V1, V2)

as_tibble.spmirt.list(samples2, iter/2, 15, "param") %>%
gg_density2d(log(V1), log(V2))

nrow(unique(samples_tib2)) / nrow(samples_tib2)
## [1] 0.16828
coda::effectiveSize(log(samples_tib2))
## V1 V2
## 423.8379 1083.1148
acf(samples_tib2)

cov(log(samples_tib2))
## V1 V2
## V1 0.13389773 -0.02155069
## V2 -0.02155069 0.10361964
# Adaptive with stochastic approximation series
system.time(
samples3 <- probit_gp_am_scale(data$response, dist, c(log(1), log(0.04)), iter,
sigma_prop, 0.35)
)
## user system elapsed
## 548.787 373.370 230.650
samples_tib3 <- as_tibble.spmirt.list(samples3, iter/2, select = "param")
summary(samples_tib3)
## # A tibble: 2 x 6
## Parameters `2.5%` `10%` `50%` `90%` `97.5%`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 V1 0.531 0.680 1.08 1.76 2.23
## 2 V2 0.0182 0.0228 0.0342 0.0509 0.0628
samples_long3 <- gather(samples_tib3)
as_tibble.spmirt.list(samples3, 0, 1, "param") %>%
gg_trace(alpha = 0.6, wrap = TRUE)

as_tibble.spmirt.list(samples3, iter/2, 1, "param") %>%
gg_density(alpha = 0.5)

as_tibble.spmirt.list(samples3, iter/2, 15, "param") %>%
gg_density2d(V1, V2)

as_tibble.spmirt.list(samples3, iter/2, 15, "param") %>%
gg_density2d(log(V1), log(V2))

nrow(unique(samples_tib3)) / nrow(samples_tib3)
## [1] 0.34392
coda::effectiveSize(log(samples_tib3))
## V1 V2
## 684.2511 1249.9670
acf(samples_tib3)

cov(log(samples_tib3))
## V1 V2
## V1 0.1336862 -0.01520090
## V2 -0.0152009 0.09656268
# V1 V2
# 671.1306 1195.5363
# system.time(
# out <- probit_gp_chol2(data$response, dist, c(log(1), log(0.05)), iter, sigma_prop)
# )
# system.time(
# out0 <- probit_gp(data$response, dist, c(log(1), log(0.05)), iter, sigma_prop)
# )
# plot(out$param[, 2])
# abline(h = 0.03, col = 2)
# # Compare priors with posteriors
# # phi
# hist(exp(rnorm(50000, log(0.03), 0.4)), 200, freq = FALSE )
#
# system.time(
# out <- probit_gp_chol2(data$response, dist, c(log(1), log(0.05)), iter, sigma_prop)
# )
# system.time(
# out0 <- probit_gp(data$response, dist, c(log(1), log(0.05)), iter, sigma_prop)
# )
# plot(out$param[, 2])
# abline(h = 0.03, col = 2)
# # Compare priors with posteriors
# # phi
# hist(exp(rnorm(50000, log(0.03), 0.4)), 200, freq = FALSE )
# lines(density(out$param[(iter/2):iter, 2]), col = 2)
# quantile(out$param[(iter/2):iter, 2], c(0.025, 0.975))
#
# # sigma^2
# hist(exp(rnorm(50000, log(1), 0.4)), 200, freq = FALSE, xlim = c(0, 5))
# # hist(exp(rnorm(5000, log(1), 0.3)), 100, freq = FALSE)
# lines(density(out$param[(iter/2):iter, 1]), col = 2)
# quantile(out$param[(iter/2):iter, 1], c(0.025, 0.975))
#
# # phi
# hist(exp(rnorm(50000, log(0.03), 0.4)), 200, freq = FALSE )
# lines(density(out$param[(iter/2):iter, 2]), col = 2)
# quantile(out$param[(iter/2):iter, 2], c(0.025, 0.975))
#
# # sigma^2
# hist(exp(rnorm(50000, log(1), 0.4)), 200, freq = FALSE, xlim = c(0, 5))
# # hist(exp(rnorm(5000, log(1), 0.3)), 100, freq = FALSE)
# lines(density(out$param[(iter/2):iter, 1]), col = 2)
# quantile(out$param[(iter/2):iter, 1], c(0.025, 0.975))
#
# acf(out$param[(iter/2):iter, 1])
# acf(out$param[(iter/2):iter, 2])
#
# acf(out$param[seq((iter/2), iter, 10), 1])
# # acf(out$param[seq((iter/2), iter, 10), 2])
#